#!/bin/bash

model_path="/data/nfs-ten1/nfs/meichaoyang001/model/Qwen1.5-14B-Chat_240216"


export WANDB_API_KEY=9afc62359e50f5d0b24fee88ce7ce8d162e998ed
export WANDB_DISABLED=true


eval "$(conda shell.bash hook)"
conda activate /data/nfs-ten1/nfs/meichaoyang001/envs/llama_240508_cuda12_2


deepspeed --num_gpus 8 --master_port=9901 src/train.py \
    --deepspeed conf/ds_stage3_config_qwen_no_offload.json \
    --stage sft \
    --model_name_or_path $model_path \
    --do_train \
    --dataset 240131_gpt4_187971_with_1127_gpt3_5_json_1955_with_1030_with_alpaca_gpt4_data_zh3w_wizard_zh3w_en2w_shuffle \
    --preprocessing_num_workers 16 \
    --template qwen \
    --finetuning_type full \
    --output_dir checkpoint/qwen_1.5_14b_chat_full_sft_240131_gpt4_187971_with_1127_gpt3_5_json_1955_with_1030_with_alpaca_gpt4_data_zh3w_wizard_zh3w_en2w_shuffle_bs128_lr2e-5 \
    --overwrite_cache true \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 16 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --warmup_steps 100 \
    --save_steps 2000 \
    --save_only_model \
    --learning_rate 2e-5 \
    --num_train_epochs 3.0 \
    --plot_loss \
    --bf16 \
    --flash_attn fa2 \
    --ignore_len 10240 \
    --cutoff_len 102400
